//Do-file to create dataset for the WLS results
//Last changed by SAO 250109

clear
set more off

***MEDIATORS***
use "E:\ProjData\PGS and politics\WLSdata\WLS_Mediators.dta", clear
***Depression
*CESD battery from 5th wave

foreach num of numlist 4(1)9 {
replace z_iu00`num'rer = . if inrange(z_iu00`num'rer, -3, -1)
}
foreach num of numlist 10(1)23 {
replace z_iu0`num'rer = . if inrange(z_iu0`num'rer, -3, -1)
}

*Reverse code non-depression items
replace z_iu006rer = z_iu006rer*-1+7
replace z_iu009rer = z_iu009rer*-1+7
replace z_iu016rer = z_iu016rer*-1+7
replace z_iu018rer = z_iu018rer*-1+7

*Recode to 0-1 range
foreach num of numlist 4(1)9 {
replace z_iu00`num'rer = z_iu00`num'rer/7
}
foreach num of numlist 10(1)23 {
replace z_iu0`num'rer = z_iu0`num'rer/7
}

foreach num of numlist 4(1)9 {
rename z_iu00`num'rer dep_z_iu00`num'rer
}
foreach num of numlist 10(1)23 {
rename z_iu0`num'rer dep_z_iu0`num'rer
}

egen DEP = rowmean(dep*)


***Physical activity from wave 5
*Use the two itmes on hours/month vigorous training and standardize
*Top code at 40 hrs/month (there are responses up to 800 hrs/month)
*Recode to 0-1 range
replace z_iz165rer = . if inrange(z_iz165rer, -3, -1)
replace z_iz165rer = 40 if inrange(z_iz165rer, 41, 1000)
replace z_iz168rer = . if inrange(z_iz168rer, -3, -1)
replace z_iz168rer = 40 if inrange(z_iz168rer, 41, 1000)
replace z_iz171rer = . if inrange(z_iz171rer, -3, -1)
replace z_iz171rer = 40 if inrange(z_iz171rer, 41, 1000)
replace z_iz174rer = . if inrange(z_iz174rer, -3, -1)
replace z_iz174rer = 40 if inrange(z_iz174rer, 41, 1000)

*Define physical activity as percentile rank of actual activity
egen occ = rowtotal(z_iz171rer z_iz174rer)
egen n = count(occ)
egen i = rank(occ), track
gen ACTIVITY = (i - 1)/(n - 35) //There are 35 respondents in the highest category (80 hrs/month) 


***Self-rated health from wave 5
replace z_gx201re = . if inrange(z_gx201re, -3, -1)
gen SRH = (z_gx201re*-1+5)/4


***Neuroticism (use summary score from wave 5)
replace z_ih025rec = . if inrange(z_ih025rec, -2, -1)
*Recode to 0-1 range
gen NEURO = z_ih025rec/30



***Extraversion from wave 5 (use summary score from wave 5)
replace z_ih001rec = . if inrange(z_ih001rec, -2, -1)
gen EXTRA = z_ih001rec/36
sum EXTRA
tab EXTRA


***Subjective well-being (use summary score for psychological well-being fron wave 4)
replace z_rn014red = . if inrange(z_rn014red, -3, -1)
gen SWB = z_rn014red/84


***Risk attitudes from wave 5
replace z_ig004rer = . if inrange(z_ig004rer, -4, -1)
*recode to 0-1 range
gen RISK = (z_ig004rer-1)/6


***Cognitive ability (use centile rank from Henmon-Nelson test in junior high)
replace z_ghncr_bm = . if inrange(z_ghncr_bm, -3, -1)
*recode to 0-1 range
gen CP = z_ghncr_bm/100


keep idpriv rtype DEP ACTIVITY SRH NEURO EXTRA SWB RISK CP
tempfile WLSMediators
save `WLSMediators'



use "E:\ProjData\PGS and politics\WLSdata\WLS.dta", clear

merge 1:1 idpriv rtype using `WLSMediators'
drop _merge

rename edu_2004 EduYears



*Turnout measures
egen Turnout1 = rowmean(val_vote_2008 val_vote_2012)
egen Turnout2 = rowmean(val_vote_2006 val_vote_2010)


*Ideology and extremism
gen Ideology = (ideology_2011-1)/6
gen Extremism = abs(ideology_2011-4)/3


drop if familypriv==.
rename BYR Byear

keep Byear MALE Turnout1 Turnout2 idpriv rtype familypriv Ideology Extremism DEP ACTIVITY SRH NEURO EXTRA SWB RISK CP EduYears


merge 1:1 idpriv rtype using "E:\ProjData\PGS and politics\WLSdata\PGIrepo_idpriv\PGIrepo_v1.0_idpriv_shuffled.dta"
keep if _merge==3
drop _merge


rename pgi_easingle EA_PGI
rename pgi_cpsingle CP_PGI
rename pgi_adventuresingle ADVENTURE_PGI
rename pgi_neurosingle NEURO_PGI
rename pgi_risksingle RISK_PGI
rename pgi_extrasingle EXTRA_PGI
rename pgi_morningsingle MORNING_PGI
rename pgi_depsingle DEP_PGI
rename pgi_heightsingle HEIGHT_PGI
rename pgi_selfhealthsingle SRH_PGI
rename pgi_activitysingle ACTIVITY_PGI
rename pgi_bmisingle BMI_PGI
rename pgi_swbsingle SWB_PGI



*Standardize PGIs by birth year
foreach name in EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI DEP_PGI HEIGHT_PGI SRH_PGI ACTIVITY_PGI BMI_PGI SWB_PGI {
bysort Byear: egen meanPGI = mean(`name')
bysort Byear: egen sdPGI = sd(`name')
gen std`name' = (`name'-meanPGI)/sdPGI
drop meanPGI sdPGI
}



forvalues i= 1(1)20 {
rename pc`i'_PGI_shuffled PC`i'
}

drop if PC1==.


*Standardize all mediators
foreach name in EduYears DEP ACTIVITY SRH NEURO EXTRA SWB RISK CP {
zscore `name'
drop `name'
rename z_`name' `name'
}



keep Turnout1 Turnout2 Ideology Extremism DEP ACTIVITY SRH NEURO EXTRA SWB RISK CP stdEA_PGI stdCP_PGI stdEXTRA_PGI stdNEURO_PGI stdRISK_PGI stdADVENTURE_PGI stdMORNING_PGI stdSRH_PGI stdDEP_PGI stdSWB_PGI stdACTIVITY_PGI stdBMI_PGI stdHEIGHT_PGI MALE Byear PC1-PC10 familypriv EduYears EA_PGI ADVENTURE_PGI CP_PGI EXTRA_PGI MORNING_PGI NEURO_PGI RISK_PGI DEP_PGI HEIGHT_PGI SRH_PGI ACTIVITY_PGI BMI_PGI SWB_PGI

rename familypriv PairID
gen SchoolID = 0
gen sample = "wls"

save "E:\ProjData\PGS and politics/WLSdata.dta", replace

